set.seed(1)
addTimeStamp <- function(saveInfo, tstart, info){
timeDiff <- abs(round(difftime(Sys.time(), tstart, units = "mins"), 3))
print(paste0("Adding Time Stamp : ", info , ", ", timeDiff, " minutes from start..."))
saveInfo$timeStamps[[length(saveInfo$timeStamps) + 1]] <- list(
info = info,
time = timeDiff
)
saveInfo
}
print(Sys.time())
## [1] "2020-02-27 18:15:56 PST"
#Print Input Params
print(params)
## $jobAttempt
## [1] 1
##
## $project
## [1] "All_PBMC_Rep1"
##
## $projectMetadata
## [1] "Project_Metadata.tsv"
##
## $threads
## [1] 20
##
## $out1
## [1] "/scratch/users/jgranja/BenchmarksLargePBMC/Large/outputSmall/Signac/Projects/All_PBMC_Rep1/All_PBMC_Rep1-Signac-1-DataImport-Benchmark.rds"
##
## $out2
## [1] "/scratch/users/jgranja/BenchmarksLargePBMC/Large/outputSmall/Signac/Projects/All_PBMC_Rep1/All_PBMC_Rep1-Signac-1-DataImport-Save.rds"
#Get Project Metadata
df <- data.frame(readr::read_tsv(paste0(params$projectMetadata)))
## Parsed with column specification:
## cols(
## Project = col_character(),
## Metadata = col_character()
## )
#Get Sample Metadata
metadata <- data.frame(readr::read_tsv(df[paste0(df[,1]) == paste0(params$project), 2]))
## Parsed with column specification:
## cols(
## Sample = col_character(),
## Genome = col_character(),
## Path = col_character()
## )
print(metadata)
## Sample Genome
## 1 10k_pbmc Hg19
## 2 frozen_sorted_pbmc_5k Hg19
## 3 fresh_pbmc_5k Hg19
## 4 frozen_unsorted_pbmc_5k Hg19
## 5 pbmc_10k_nextgem Hg19
## 6 pbmc_10k_v1 Hg19
## 7 pbmc_1k_nextgem Hg19
## 8 pbmc_1k_v1 Hg19
## 9 pbmc_500_nextgem Hg19
## 10 pbmc_500_v1 Hg19
## 11 pbmc_5k_nextgem Hg19
## 12 pbmc_5k_v1 Hg19
## 13 scATAC_PBMC_D10T1 Hg19
## 14 scATAC_PBMC_D11T1 Hg19
## 15 scATAC_PBMC_D12T1 Hg19
## 16 scATAC_PBMC_D12T2 Hg19
## 17 scATAC_PBMC_D12T3 Hg19
## Path
## 1 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/10k_pbmc_fragments.tsv.gz
## 2 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_sorted_pbmc_5k_fragments.tsv.gz
## 3 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/fresh_pbmc_5k_fragments.tsv.gz
## 4 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_unsorted_pbmc_5k_fragments.tsv.gz
## 5 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_nextgem_fragments.tsv.gz
## 6 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_v1_fragments.tsv.gz
## 7 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_nextgem_fragments.tsv.gz
## 8 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_v1_fragments.tsv.gz
## 9 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_nextgem_fragments.tsv.gz
## 10 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_v1_fragments.tsv.gz
## 11 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_nextgem_fragments.tsv.gz
## 12 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_v1_fragments.tsv.gz
## 13 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D10T1.fragments.tsv.gz
## 14 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D11T1.fragments.tsv.gz
## 15 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T1.fragments.tsv.gz
## 16 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T2.fragments.tsv.gz
## 17 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T3.fragments.tsv.gz
#Input Files
inputFiles <- paste0(metadata$Path)
names(inputFiles) <- metadata$Sample
print(inputFiles)
## 10k_pbmc
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/10k_pbmc_fragments.tsv.gz"
## frozen_sorted_pbmc_5k
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_sorted_pbmc_5k_fragments.tsv.gz"
## fresh_pbmc_5k
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/fresh_pbmc_5k_fragments.tsv.gz"
## frozen_unsorted_pbmc_5k
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_unsorted_pbmc_5k_fragments.tsv.gz"
## pbmc_10k_nextgem
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_nextgem_fragments.tsv.gz"
## pbmc_10k_v1
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_v1_fragments.tsv.gz"
## pbmc_1k_nextgem
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_nextgem_fragments.tsv.gz"
## pbmc_1k_v1
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_v1_fragments.tsv.gz"
## pbmc_500_nextgem
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_nextgem_fragments.tsv.gz"
## pbmc_500_v1
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_v1_fragments.tsv.gz"
## pbmc_5k_nextgem
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_nextgem_fragments.tsv.gz"
## pbmc_5k_v1
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_v1_fragments.tsv.gz"
## scATAC_PBMC_D10T1
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D10T1.fragments.tsv.gz"
## scATAC_PBMC_D11T1
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D11T1.fragments.tsv.gz"
## scATAC_PBMC_D12T1
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T1.fragments.tsv.gz"
## scATAC_PBMC_D12T2
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T2.fragments.tsv.gz"
## scATAC_PBMC_D12T3
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T3.fragments.tsv.gz"
#Genome
genome <- tolower(paste0(metadata$Genome[1]))
#Working Directory Relative to project
owd <- getwd()
setwd(dirname(paste0(params$out1)))
#Copy from storage to faster more conistent current working directory
for(i in seq_along(inputFiles)){
system(paste0("cp ", inputFiles[i], " ", basename(inputFiles[i])))
}
inputFiles <- basename(inputFiles)
names(inputFiles) <- metadata$Sample
print(inputFiles)
## 10k_pbmc
## "10k_pbmc_fragments.tsv.gz"
## frozen_sorted_pbmc_5k
## "frozen_sorted_pbmc_5k_fragments.tsv.gz"
## fresh_pbmc_5k
## "fresh_pbmc_5k_fragments.tsv.gz"
## frozen_unsorted_pbmc_5k
## "frozen_unsorted_pbmc_5k_fragments.tsv.gz"
## pbmc_10k_nextgem
## "atac_pbmc_10k_nextgem_fragments.tsv.gz"
## pbmc_10k_v1
## "atac_pbmc_10k_v1_fragments.tsv.gz"
## pbmc_1k_nextgem
## "atac_pbmc_1k_nextgem_fragments.tsv.gz"
## pbmc_1k_v1
## "atac_pbmc_1k_v1_fragments.tsv.gz"
## pbmc_500_nextgem
## "atac_pbmc_500_nextgem_fragments.tsv.gz"
## pbmc_500_v1
## "atac_pbmc_500_v1_fragments.tsv.gz"
## pbmc_5k_nextgem
## "atac_pbmc_5k_nextgem_fragments.tsv.gz"
## pbmc_5k_v1
## "atac_pbmc_5k_v1_fragments.tsv.gz"
## scATAC_PBMC_D10T1
## "scATAC_PBMC_D10T1.fragments.tsv.gz"
## scATAC_PBMC_D11T1
## "scATAC_PBMC_D11T1.fragments.tsv.gz"
## scATAC_PBMC_D12T1
## "scATAC_PBMC_D12T1.fragments.tsv.gz"
## scATAC_PBMC_D12T2
## "scATAC_PBMC_D12T2.fragments.tsv.gz"
## scATAC_PBMC_D12T3
## "scATAC_PBMC_D12T3.fragments.tsv.gz"
#Remove any tbi files in case of re-run
o <- suppressWarnings(file.remove(list.files(pattern = ".tbi")))
#Start Time
saveInfo <- list(timeStamps =
list(
list(info = "Start", time = 0)
),
info = list()
)
#Threads based on how many times this job was attempted (killed by using too much memory)
if(params$jobAttempt == 1){
threads <- params$threads
}else if(params$jobAttempt == 2){
threads <- floor(params$threads / 2)
}else{
threads <- 1
}
print(threads)
## [1] 20
#Load R Libraries
library(Signac)
library(Seurat)
library(GenomeInfoDb)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
library(GenomicRanges)
library(ggplot2)
library(future)
##
## Attaching package: 'future'
## The following object is masked from 'package:GenomicRanges':
##
## values
## The following object is masked from 'package:IRanges':
##
## values
## The following object is masked from 'package:S4Vectors':
##
## values
#This is hidden but it looks like it applies to FeatureMatrix
# NOTE : This crashed everytime I used it so we are disabling it.
#Error: Failed to retrieve the result of MulticoreFuture (future_lapply-1) from the forked worker (on
#localhost; PID 168992). Post-mortem diagnostic: No process exists with this PID, i.e. the forked
#localhost worker is no longer alive.
#Error: Failed to retrieve the result of MulticoreFuture (future_lapply-5) from the forked worker (on
#localhost; PID 129250). Post-mortem diagnostic: No process exists with this PID, i.e. the forked
#localhost worker is no longer alive.
#options(future.globals.maxSize = 32 * 1024 ^ 3) #32 GB
#plan("multiprocess", workers = threads)
#plan()
#Get Hematopoiesis Peak Set from Corces et al.
peakFile <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE74nnn/GSE74912/suppl/GSE74912_ATACseq_All_Counts.txt.gz"
features <- readr::read_tsv(peakFile)[,1:3]
## Parsed with column specification:
## cols(
## .default = col_double(),
## Chr = col_character()
## )
## See spec(...) for full column specifications.
features <- GRanges(features$Chr, IRanges(start = features$Start, end = features$End))
print(features)
## GRanges object with 590650 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 10025-10525 *
## [2] chr1 13252-13752 *
## [3] chr1 16019-16519 *
## [4] chr1 96376-96876 *
## [5] chr1 115440-115940 *
## ... ... ... ...
## [590646] chrX 154880741-154881241 *
## [590647] chrX 154891824-154892324 *
## [590648] chrX 154896342-154896842 *
## [590649] chrX 154912441-154912941 *
## [590650] chrX 155259860-155260360 *
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
tstart <- Sys.time()
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("getPeaks"))
## [1] "Adding Time Stamp : getPeaks, 0 minutes from start..."
#For Computing TSS Scores Per Cell
if(!requireNamespace("EnsDb.Hsapiens.v75", quietly = TRUE))
BiocManager::install("EnsDb.Hsapiens.v75")
library(EnsDb.Hsapiens.v75)
## Loading required package: ensembldb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
##
## Attaching package: 'AnnotationFilter'
## The following object is masked from 'package:future':
##
## value
##
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
##
## filter
geneRanges <- genes(EnsDb.Hsapiens.v75)
tssRanges <- GRanges(
seqnames = seqnames(geneRanges),
ranges = IRanges(start = start(geneRanges), width = 2),
strand = strand(geneRanges)
)
seqlevelsStyle(tssRanges) <- 'UCSC'
tssRanges <- keepStandardChromosomes(tssRanges, pruning.mode = 'coarse')
tssRanges1 <- tssRanges[which(seqnames(tssRanges) %in% c("chr1", "chr2", "chr3"))]
print(tssRanges1)
## GRanges object with 12614 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 11869-11870 +
## [2] chr1 14363-14364 -
## [3] chr1 29554-29555 +
## [4] chr1 34554-34555 -
## [5] chr1 52473-52474 +
## ... ... ... ...
## [12610] chr3 197848259-197848260 -
## [12611] chr3 197879237-197879238 +
## [12612] chr3 197923617-197923618 +
## [12613] chr3 197950368-197950369 -
## [12614] chr3 197955065-197955066 +
## -------
## seqinfo: 25 sequences from an unspecified genome; no seqlengths
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("loadGenome"))
## [1] "Adding Time Stamp : loadGenome, 0.051 minutes from start..."
#Process each sample by getting a counts matrix and then TSS scores so that we can filter bad cells properly
seuratOut <- c()
for(i in seq_along(inputFiles)){
print(paste0("Running Sample : ", names(inputFiles)[i]))
#1. Index Tabix
Rsamtools::indexTabix(inputFiles[i], format = "bed")
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("IndexTabix=", names(inputFiles)[i]))
#2. Filter low cell # fragments
fragsPerCell <- table(data.table::fread(inputFiles[i], sep = "\t", select = 4)$V4)
fragsPerCell <- fragsPerCell[fragsPerCell >= 1000]
print(paste0("nCells = ", length(fragsPerCell)))
print(paste0("median frags per cell = ", median(fragsPerCell)))
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("FilterLowBC=", names(inputFiles)[i]))
#3. Create cell x peak matrix
countsMat <- FeatureMatrix(
fragments = inputFiles[i],
features = features,
cells = names(fragsPerCell),
verbose = TRUE
)
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("CellByPeak=", names(inputFiles)[i]))
#4. Create Seurat Object
seuratObj <- CreateSeuratObject(
counts = countsMat,
assay = 'peaks',
project = 'ATAC',
min.cells = 1,
min.features = 1
)
seuratObj$sampleName <- names(inputFiles)[i]
#5. Set Fragments
seuratObj <- SetFragments(
object = seuratObj,
file = inputFiles[i]
)
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("CreateSeurat=", names(inputFiles)[i]))
#6. TSS Scores
#Note the tutorial does 2k TSS, when using downsampled test datasets (which I know the TSS scores and true # pass filter)
#this is too little to get an accurate QC. By default there will be now all TSS on chr1,chr2,chr3 being computed to ensure ok stability.
#Im worried that this score isnt stable because its not the signal at the center of the TSS and when running it at different
#Number of TSS I get different results. Also, this value depends on the number of reads in peaks somehow??
seuratObj <- TSSEnrichment(object = seuratObj, tss.positions = tssRanges1)
print(paste0("median TSS per cell = ", median(seuratObj$TSS.enrichment)))
df <- data.frame(x = log10(seuratObj$nCount_peaks + 1), y = seuratObj$TSS.enrichment)
p <- ggplot(df, aes(x, y)) +
geom_point() + theme_bw() +
ylim(c(0, quantile(df$y, 0.99))) +
xlim(c(quantile(df$x, 0.001), quantile(df$x, 0.99)))
print(p)
#Plot TSS Scores
seuratObj$highTSS <- ifelse(seuratObj$TSS.enrichment >= 2, 'High', 'Low')
p <- TSSPlot(seuratObj, group.by = 'highTSS') + ggtitle(paste0("TSS enrichment score : ", names(inputFiles)[i])) + NoLegend()
print(p)
#7. Filter Cells with TSS greater than 2 (I found this score is very unstable with
#low # of fragment cells but we can at least filter junk)
seuratObj <- subset(seuratObj, subset = TSS.enrichment >= 2 & seuratObj$nCount_peaks >= 500)
print(paste0("nCells pass filter = ", ncol(seuratObj)))
outFile <- paste0(names(inputFiles)[i], ".rds")
saveRDS(seuratObj, outFile)
rm(countsMat)
rm(seuratObj)
gc()
seuratOut <- c(seuratOut, outFile)
}
## [1] "Running Sample : 10k_pbmc"
## [1] "Adding Time Stamp : IndexTabix=10k_pbmc, 1.497 minutes from start..."
## [1] "nCells = 16436"
## [1] "median frags per cell = 7660.5"
## [1] "Adding Time Stamp : FilterLowBC=10k_pbmc, 4.073 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=10k_pbmc, 72.786 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=10k_pbmc, 73.196 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score
## [1] "median TSS per cell = 2.805625"
## Warning: Removed 338 rows containing missing values (geom_point).

## [1] "nCells pass filter = 9928"
## [1] "Running Sample : frozen_sorted_pbmc_5k"
## [1] "Adding Time Stamp : IndexTabix=frozen_sorted_pbmc_5k, 134.823 minutes from start..."
## [1] "nCells = 9837"
## [1] "median frags per cell = 3710"
## [1] "Adding Time Stamp : FilterLowBC=frozen_sorted_pbmc_5k, 135.62 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=frozen_sorted_pbmc_5k, 189.052 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=frozen_sorted_pbmc_5k, 189.217 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 2.814"
## Warning: Removed 207 rows containing missing values (geom_point).

## [1] "nCells pass filter = 6429"
## [1] "Running Sample : fresh_pbmc_5k"
## [1] "Adding Time Stamp : IndexTabix=fresh_pbmc_5k, 212.667 minutes from start..."
## [1] "nCells = 4966"
## [1] "median frags per cell = 7198.5"
## [1] "Adding Time Stamp : FilterLowBC=fresh_pbmc_5k, 213.355 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=fresh_pbmc_5k, 258.335 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=fresh_pbmc_5k, 258.474 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.02647285190714"
## Warning: Removed 105 rows containing missing values (geom_point).

## [1] "nCells pass filter = 4460"
## [1] "Running Sample : frozen_unsorted_pbmc_5k"
## [1] "Adding Time Stamp : IndexTabix=frozen_unsorted_pbmc_5k, 280.235 minutes from start..."
## [1] "nCells = 30433"
## [1] "median frags per cell = 1343"
## [1] "Adding Time Stamp : FilterLowBC=frozen_unsorted_pbmc_5k, 281.211 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=frozen_unsorted_pbmc_5k, 346.535 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=frozen_unsorted_pbmc_5k, 346.707 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.206"
## Warning: Removed 591 rows containing missing values (geom_point).

## [1] "nCells pass filter = 5352"
## [1] "Running Sample : pbmc_10k_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_10k_nextgem, 368.613 minutes from start..."
## [1] "nCells = 13785"
## [1] "median frags per cell = 9784"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_10k_nextgem, 370.425 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_10k_nextgem, 430.118 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_10k_nextgem, 430.466 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 2.94531999999998"
## Warning: Removed 289 rows containing missing values (geom_point).

## [1] "nCells pass filter = 10060"
## [1] "Running Sample : pbmc_10k_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_10k_v1, 483.168 minutes from start..."
## [1] "nCells = 9584"
## [1] "median frags per cell = 8861.5"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_10k_v1, 484.211 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_10k_v1, 534.619 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_10k_v1, 534.878 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.05715789473684"
## Warning: Removed 202 rows containing missing values (geom_point).

## [1] "nCells pass filter = 7700"
## [1] "Running Sample : pbmc_1k_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_1k_nextgem, 568.809 minutes from start..."
## [1] "nCells = 1102"
## [1] "median frags per cell = 14492"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_1k_nextgem, 568.979 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_1k_nextgem, 598.654 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_1k_nextgem, 598.72 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.45349140845072"
## Warning: Removed 26 rows containing missing values (geom_point).

## [1] "nCells pass filter = 1065"
## [1] "Running Sample : pbmc_1k_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_1k_v1, 607.576 minutes from start..."
## [1] "nCells = 978"
## [1] "median frags per cell = 8529"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_1k_v1, 607.694 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_1k_v1, 640.84 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_1k_v1, 640.9 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.43303655435472"
## Warning: Removed 21 rows containing missing values (geom_point).

## [1] "nCells pass filter = 889"
## [1] "Running Sample : pbmc_500_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_500_nextgem, 648.168 minutes from start..."
## [1] "nCells = 548"
## [1] "median frags per cell = 15828.5"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_500_nextgem, 648.258 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_500_nextgem, 669.875 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_500_nextgem, 669.898 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.45889810344829"
## Warning: Removed 13 rows containing missing values (geom_point).

## [1] "nCells pass filter = 527"
## [1] "Running Sample : pbmc_500_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_500_v1, 676.091 minutes from start..."
## [1] "nCells = 439"
## [1] "median frags per cell = 10959"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_500_v1, 676.163 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_500_v1, 702.295 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_500_v1, 702.314 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.44822330097086"
## Warning: Removed 11 rows containing missing values (geom_point).

## [1] "nCells pass filter = 419"
## [1] "Running Sample : pbmc_5k_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_5k_nextgem, 708.436 minutes from start..."
## [1] "nCells = 5278"
## [1] "median frags per cell = 13394"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_5k_nextgem, 709.242 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_5k_nextgem, 753.752 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_5k_nextgem, 753.959 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.25727551382001"
## Warning: Removed 112 rows containing missing values (geom_point).

## [1] "nCells pass filter = 4870"
## [1] "Running Sample : pbmc_5k_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_5k_v1, 782.63 minutes from start..."
## [1] "nCells = 4583"
## [1] "median frags per cell = 8591"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_5k_v1, 783.135 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_5k_v1, 826.121 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_5k_v1, 826.272 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.294"
## Warning: Removed 97 rows containing missing values (geom_point).

## [1] "nCells pass filter = 4049"
## [1] "Running Sample : scATAC_PBMC_D10T1"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D10T1, 846.089 minutes from start..."
## [1] "nCells = 13618"
## [1] "median frags per cell = 1416"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D10T1, 846.972 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D10T1, 905.763 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D10T1, 905.885 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.206"
## Warning: Removed 288 rows containing missing values (geom_point).

## [1] "nCells pass filter = 3105"
## [1] "Running Sample : scATAC_PBMC_D11T1"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D11T1, 924.331 minutes from start..."
## [1] "nCells = 24945"
## [1] "median frags per cell = 1282"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D11T1, 925.083 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D11T1, 997.564 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D11T1, 997.654 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.072"
## Warning: Removed 522 rows containing missing values (geom_point).

## [1] "nCells pass filter = 2667"
## [1] "Running Sample : scATAC_PBMC_D12T1"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D12T1, 1008.652 minutes from start..."
## [1] "nCells = 2103"
## [1] "median frags per cell = 2470"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D12T1, 1008.975 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D12T1, 1047.165 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D12T1, 1047.2 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.6884"
## Warning: Removed 47 rows containing missing values (geom_point).

## [1] "nCells pass filter = 870"
## [1] "Running Sample : scATAC_PBMC_D12T2"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D12T2, 1055.693 minutes from start..."
## [1] "nCells = 1598"
## [1] "median frags per cell = 17165.5"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D12T2, 1055.992 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D12T2, 1093.319 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D12T2, 1093.384 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.23970151199164"
## Warning: Removed 34 rows containing missing values (geom_point).

## [1] "nCells pass filter = 1284"
## [1] "Running Sample : scATAC_PBMC_D12T3"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D12T3, 1105.421 minutes from start..."
## [1] "nCells = 2082"
## [1] "median frags per cell = 7006"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D12T3, 1105.707 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D12T3, 1140.511 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D12T3, 1140.597 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.20224736842107"
## Warning: Removed 44 rows containing missing values (geom_point).


## [1] "nCells pass filter = 1857"
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("Completed"))
## [1] "Adding Time Stamp : Completed, 1151.833 minutes from start..."
print(Sys.time() - tstart)
## Time difference of 19.19722 hours
#Set Original Working Directory
#Save Output
seuratOut <- file.path(getwd(), seuratOut)
setwd(owd)
saveRDS(saveInfo, params$out1)
saveRDS(seuratOut, params$out2)
#Print Session Info
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.2.19/lib/libopenblasp-r0.2.19.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] EnsDb.Hsapiens.v75_2.99.0 ensembldb_2.10.2
## [3] AnnotationFilter_1.10.0 GenomicFeatures_1.38.2
## [5] AnnotationDbi_1.48.0 Biobase_2.46.0
## [7] future_1.16.0 ggplot2_3.2.1
## [9] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
## [11] IRanges_2.20.2 S4Vectors_0.24.3
## [13] BiocGenerics_0.32.0 Seurat_3.1.2
## [15] Signac_0.2.2
##
## loaded via a namespace (and not attached):
## [1] sn_1.5-5 plyr_1.8.5
## [3] igraph_1.2.4.2 lazyeval_0.2.2
## [5] splines_3.6.1 BiocParallel_1.20.1
## [7] listenv_0.8.0 TH.data_1.0-10
## [9] digest_0.6.23 htmltools_0.4.0
## [11] gdata_2.18.0 magrittr_1.5
## [13] memoise_1.1.0 BSgenome_1.54.0
## [15] cluster_2.1.0 ROCR_1.0-7
## [17] ggfittext_0.8.1 globals_0.12.5
## [19] Biostrings_2.54.0 readr_1.3.1
## [21] RcppParallel_4.4.4 matrixStats_0.55.0
## [23] R.utils_2.9.2 sandwich_2.5-1
## [25] prettyunits_1.1.1 colorspace_1.4-1
## [27] blob_1.2.1 rappdirs_0.3.1
## [29] ggrepel_0.8.1 xfun_0.12
## [31] dplyr_0.8.4 crayon_1.3.4
## [33] RCurl_1.98-1.1 jsonlite_1.6
## [35] survival_2.44-1.1 zoo_1.8-7
## [37] ape_5.3 glue_1.3.1
## [39] gtable_0.3.0 zlibbioc_1.32.0
## [41] XVector_0.26.0 leiden_0.3.2
## [43] DelayedArray_0.12.2 future.apply_1.4.0
## [45] scales_1.1.0 mvtnorm_1.0-12
## [47] DBI_1.1.0 bibtex_0.4.2.2
## [49] Rcpp_1.0.3 metap_1.3
## [51] plotrix_3.7-7 viridisLite_0.3.0
## [53] progress_1.2.2 reticulate_1.14
## [55] bit_1.1-15.1 rsvd_1.0.2
## [57] SDMTools_1.1-221.2 tsne_0.1-3
## [59] htmlwidgets_1.5.1 httr_1.4.1
## [61] gplots_3.0.1.2 RColorBrewer_1.1-2
## [63] TFisher_0.2.0 ica_1.0-2
## [65] farver_2.0.3 pkgconfig_2.0.3
## [67] XML_3.99-0.3 R.methodsS3_1.7.1
## [69] ggseqlogo_0.1 uwot_0.1.5
## [71] labeling_0.3 tidyselect_1.0.0
## [73] rlang_0.4.4 reshape2_1.4.3
## [75] munsell_0.5.0 tools_3.6.1
## [77] RSQLite_2.2.0 ggridges_0.5.2
## [79] evaluate_0.14 stringr_1.4.0
## [81] yaml_2.2.1 npsurv_0.4-0
## [83] knitr_1.27 bit64_0.9-7
## [85] fitdistrplus_1.0-14 caTools_1.18.0
## [87] purrr_0.3.3 RANN_2.6.1
## [89] pbapply_1.4-2 nlme_3.1-140
## [91] R.oo_1.23.0 biomaRt_2.40.5
## [93] compiler_3.6.1 curl_4.3
## [95] plotly_4.9.1 png_0.1-7
## [97] lsei_1.2-0 tibble_2.1.3
## [99] stringi_1.4.5 lattice_0.20-38
## [101] ProtGenerics_1.18.0 Matrix_1.2-17
## [103] multtest_2.42.0 vctrs_0.2.2
## [105] mutoss_0.1-12 pillar_1.4.3
## [107] lifecycle_0.1.0 Rdpack_0.11-1
## [109] lmtest_0.9-37 RcppAnnoy_0.0.14
## [111] data.table_1.12.8 cowplot_1.0.0
## [113] bitops_1.0-6 irlba_2.3.3
## [115] gbRd_0.4-11 gggenes_0.4.0
## [117] patchwork_1.0.0 rtracklayer_1.46.0
## [119] R6_2.4.1 KernSmooth_2.23-15
## [121] gridExtra_2.3 codetools_0.2-16
## [123] MASS_7.3-51.4 gtools_3.8.1
## [125] assertthat_0.2.1 SummarizedExperiment_1.16.1
## [127] withr_2.1.2 GenomicAlignments_1.22.1
## [129] sctransform_0.2.1 Rsamtools_2.2.1
## [131] mnormt_1.5-5 multcomp_1.4-12
## [133] GenomeInfoDbData_1.2.2 hms_0.5.3
## [135] grid_3.6.1 tidyr_1.0.2
## [137] rmarkdown_2.1 Rtsne_0.15
## [139] numDeriv_2016.8-1.1